* This is a minimally edited copy of parole.do from the Heyes and Saberian (2022) archive. (https://www.openicpsr.org/openicpsr/project/127263/version/V1/view?path=/openicpsr/127263/fcr:versions/V1/Errata_judge/Parole/parole.do&type=file)
* It has been modified to merge the new 2016-19 Board of Parole Hearings Data with contemporaneous weather and pollution data.
* It stores intermediate files in the "tmp" subdirectory the final output, parole1619.dta, in the "replication" subdirectory.

cap cd "/Users/davidroodman/Library/CloudStorage/OneDrive-OpenPhilanthropyProject/Replication opinions/Heyes and Saberian 2019"
cap cd "D:\OneDrive - Open Philanthropy Project\Replication opinions\Heyes and Saberian 2019/"

* process raw NOAA daily summary data; source only of precipitation
cap frame create stations
cwf stations
import delimited "NOAA GHCNd/ghcnd-stations.csv", clear
ren (v1-v3 v5) (station latitude longitude state)
keep if state=="CA" & substr(station,1,2)=="US"
keep station latitude longitude
cwf default
forvalues y=2016/2019 {
  import delimited "NOAA GHCNd/`y.csv'", stringcols(2) clear
  ren (v1-v4) (station datestr element prcp)
  keep if element=="PRCP" & substr(station,1,2)=="US"
  frlink m:1 station, frame(stations)
  drop if mi(stations)
  frget latitude longitude, from(stations)
  gen date = date(datestr, "YMD")
  keep station date prcp latitude longitude
  save tmp/prcp`y', replace
}


foreach file in daily_WIND_2016 daily_WIND_2017 daily_WIND_2018 daily_WIND_2019 {


import delimited "EPA AQS/`file'.csv", clear

drop if parametername=="Wind Direction - Resultant"
drop if methodname=="INSTRUMENTAL - VECTOR SUMMATION LEVEL 3" | methodname =="INSTRUMENTAL - VECTOR SUMMATION LEVEL 2"


g date= date(datelocal,"YMD")
format date %td
**REST ARE FORECAST BASED
keep if poc==1

duplicates drop date latitude longitude, force

rename arithmeticmean windmean

keep windmean date latitude longitude
g type=2

append using "CA BPH/insgps"
replace type=1 if type==.

tempfile main type2
save "`main'"
keep if type == 2
rename * *2
gen id2 = _n
save "`type2'"
use "`main'"
keep if type == 1
gen id1 = _n

cross using "`type2'"
geodist latitude longitude latitude2 longitude2, gen(dwind) sphere mi

keep  hrginst windmean2 latitude2 longitude2 id1 dwind date2
rename (windmean2 latitude2 longitude2 date2)(windmean latitude longitude date)
sort id1 dwind date

duplicates drop id1 date, force

save tmp/`file'distance, replace

}

use tmp/daily_WIND_2016distance,clear
append using tmp/daily_WIND_2017distance tmp/daily_WIND_2018distance tmp/daily_WIND_2019distance
save tmp/winddistance, replace


foreach file in prcp2016 prcp2017 prcp2018 prcp2019 {

use tmp/`file', clear

duplicates drop date latitude longitude, force

use "CA BPH/insgps",clear
g type=1
append using tmp/`file'
replace type=2 if type==.

tempfile main type2
save "`main'"
keep if type == 2
rename * *2
gen id2 = _n
save "`type2'"
use "`main'"
keep if type == 1
gen id1 = _n

cross using "`type2'"
geodist latitude longitude latitude2 longitude2, gen(dprcp) sphere  mi

keep  hrginst prcp2 latitude2 longitude2 id1 dprcp date2
rename (prcp2 latitude2 longitude2 date2)(prcp latitude longitude date)
sort id1 dprcp date
duplicates drop id1 date, force


save tmp/`file'distance, replace

}

use tmp/prcp2019distance,clear
append using tmp/prcp2016distance tmp/prcp2017distance tmp/prcp2018distance
save tmp/prcpdistance, replace


foreach file in daily_TEMP_2016 daily_TEMP_2017 daily_TEMP_2018 daily_TEMP_2019 {

import delimited "EPA AQS/`file'.csv", clear

rename arithmeticmean tempmean 

g date= date(datelocal,"YMD")
format date %td 


keep if poc==1

duplicates drop date latitude longitude, force

keep tempmean date latitude longitude
g type=2


append using "CA BPH/insgps"
replace type=1 if type==.

tempfile main type2
save "`main'"
keep if type == 2
rename * *2
gen id2 = _n
save "`type2'"
use "`main'"
keep if type == 1
gen id1 = _n

cross using "`type2'"
geodist latitude longitude latitude2 longitude2, gen(dtemp) sphere  mi

keep  hrginst tempmean2  latitude2 longitude2 id1 dtemp date2
rename (tempmean2 latitude2 longitude2 date2)(tempmean latitude longitude date)
sort id1 dtemp date
duplicates drop id1 date, force

by id1: g ltemp= tempmean[_n-1] if date==date[_n-1] + 1
by id1: g letemp= tempmean[_n+1] if date==date[_n+1] - 1

save tmp/`file'distance, replace

}

use tmp/daily_TEMP_2019distance
append using tmp/daily_TEMP_2016distance tmp/daily_TEMP_2017distance tmp/daily_TEMP_2018distance
save tmp/tempdistance, replace 

foreach file in daily_PRESS_2016 daily_PRESS_2017 daily_PRESS_2018 daily_PRESS_2019 {

import delimited "EPA AQS/`file'.csv", clear

rename arithmeticmean pressmean 

g date= date(datelocal,"YMD")
format date %td 

keep if poc==1

duplicates drop date latitude longitude, force

keep pressmean date latitude longitude
g type=2

append using "CA BPH/insgps"
replace type=1 if type==.

tempfile main type2
save "`main'"
keep if type == 2
rename * *2
gen id2 = _n
save "`type2'"
use "`main'"
keep if type == 1
gen id1 = _n

cross using "`type2'"
geodist latitude longitude latitude2 longitude2, gen(dpress) sphere  mi

keep  hrginst pressmean2  latitude2 longitude2 id1 dpress date2
rename (pressmean2  latitude2 longitude2 date2)(pressmean  latitude longitude date)
sort id1 dpress date
duplicates drop id1 date, force


save tmp/`file'distance, replace

}

use tmp/daily_PRESS_2019distance
append using tmp/daily_PRESS_2016distance tmp/daily_PRESS_2017distance tmp/daily_PRESS_2018distance
save tmp/pressdistance, replace


foreach file in daily_RH_DP_2016 daily_RH_DP_2017 daily_RH_DP_2018 daily_RH_DP_2019 {

import delimited "EPA AQS/`file'.csv", clear

keep if parametername=="Relative Humidity "

rename arithmeticmean rhmean 

g date= date(datelocal,"YMD")
format date %td 

keep if poc==1

duplicates drop date latitude longitude, force

keep rhmean date latitude longitude
g type=2



append using "CA BPH/insgps"
replace type=1 if type==.

tempfile main type2
save "`main'"
keep if type == 2
rename * *2
gen id2 = _n
save "`type2'"
use "`main'"
keep if type == 1
gen id1 = _n

cross using "`type2'"
geodist latitude longitude latitude2 longitude2, gen(drh) sphere  mi

keep  hrginst rhmean2 latitude2 longitude2 id1 drh date2
rename (rhmean2 latitude2 longitude2 date2)(rhmean latitude longitude date)
sort id1 drh date
duplicates drop id1 date, force


save tmp/`file'distance, replace

}

use tmp/daily_RH_DP_2019distance
append using tmp/daily_RH_DP_2016distance tmp/daily_RH_DP_2017distance tmp/daily_RH_DP_2018distance 
save tmp/rhdistance, replace


foreach file in daily_42101_2016 daily_42101_2017 daily_42101_2018 daily_42101_2019{

import delimited "EPA AQS/`file'.csv", clear

rename arithmeticmean comean

g date= date(datelocal,"YMD")
format date %td 

keep if sampleduration=="8-HR RUN AVG END HOUR"
keep if poc==1


duplicates drop date latitude longitude, force

keep comean date latitude longitude

g type=2



append using "CA BPH/insgps"
replace type=1 if type==.

tempfile main type2
save "`main'"
keep if type == 2
rename * *2
gen id2 = _n
save "`type2'"
use "`main'"
keep if type == 1
gen id1 = _n

cross using "`type2'"
geodist latitude longitude latitude2 longitude2, gen(dco) sphere  mi

keep  hrginst comean2 latitude2 longitude2 id1 dco date2
rename (comean2 latitude2 longitude2 date2)(comean latitude longitude date)
sort id1 dco date
duplicates drop id1 date, force


save tmp/`file'distance, replace

}

use tmp/daily_42101_2016distance
append using tmp/daily_42101_2017distance tmp/daily_42101_2018distance tmp/daily_42101_2019distance
save tmp/codistance, replace


foreach file in daily_44201_2016 daily_44201_2017 daily_44201_2018 daily_44201_2019{

import delimited "EPA AQS/`file'.csv", clear

rename arithmeticmean ozonemean
keep if observationcount==17
keep if poc==1

g date= date(datelocal,"YMD")
format date %td 

keep if poc==1


duplicates drop date latitude longitude, force

keep ozonemean date latitude longitude
g type=2



append using "CA BPH/insgps"
replace type=1 if type==.

tempfile main type2
save "`main'"
keep if type == 2
rename * *2
gen id2 = _n
save "`type2'"
use "`main'"
keep if type == 1
gen id1 = _n

cross using "`type2'"
geodist latitude longitude latitude2 longitude2, gen(dozone) sphere  mi

keep  hrginst ozonemean2 latitude2 longitude2 id1 dozone date2
rename (ozonemean2 latitude2 longitude2 date2)(ozonemean latitude longitude date)
sort id1 dozone date
duplicates drop id1 date, force


save tmp/`file'distance, replace

}

use tmp/daily_44201_2016distance
append using tmp/daily_44201_2017distance tmp/daily_44201_2018distance tmp/daily_44201_2019distance
save tmp/ozonedistance, replace

foreach file in daily_88101_2016 daily_88101_2017 daily_88101_2018 daily_88101_2019{

import delimited "EPA AQS/`file'.csv", clear

rename arithmeticmean pm

g date= date(datelocal,"YMD")
format date %td 

duplicates drop date latitude longitude, force

keep pm date latitude longitude
g type=2



append using "CA BPH/insgps"
replace type=1 if type==.

tempfile main type2
save "`main'"
keep if type == 2
rename * *2
gen id2 = _n
save "`type2'"
use "`main'"
keep if type == 1
gen id1 = _n

cross using "`type2'"
geodist latitude longitude latitude2 longitude2, gen(dpm) sphere  mi

keep  hrginst pm2 latitude2 longitude2 id1 dpm date2
rename (pm2 latitude2 longitude2 date2)(pm latitude longitude date)
sort id1 dpm date
duplicates drop id1 date, force


save tmp/`file'distance, replace

}

use tmp/daily_88101_2016distance
append using tmp/daily_88101_2017distance tmp/daily_88101_2018distance tmp/daily_88101_2019distance
save tmp/pmdistance, replace

foreach file in daily_42602_2016 daily_42602_2017 daily_42602_2018 daily_42602_2019{

import delimited "EPA AQS/`file'.csv", clear

rename arithmeticmean nomean

g date= date(datelocal,"YMD")
format date %td 

duplicates drop date latitude longitude, force

keep nomean date latitude longitude
g type=2



append using "CA BPH/insgps"
replace type=1 if type==.

tempfile main type2
save "`main'"
keep if type == 2
rename * *2
gen id2 = _n
save "`type2'"
use "`main'"
keep if type == 1
gen id1 = _n

cross using "`type2'"
geodist latitude longitude latitude2 longitude2, gen(dno) sphere  mi

keep  hrginst nomean2 latitude2 longitude2 id1 dno date2
rename (nomean2 latitude2 longitude2 date2)(nomean latitude longitude date)
sort id1 dno date
duplicates drop id1 date, force


save tmp/`file'distance, replace

}

use tmp/daily_42602_2016distance
append using tmp/daily_42602_2017distance tmp/daily_42602_2018distance tmp/daily_42602_2019distance
save tmp/nodistance, replace


use tmp/pmdistance, clear

merge 1:1 date hrginst using tmp/nodistance, nogen keep (1 2 3)
merge 1:1 date hrginst using tmp/codistance, nogen keep (1 2 3)
merge 1:1 date hrginst using tmp/ozonedistance, nogen keep (1 2 3)
merge 1:1 date hrginst using tmp/rhdistance, nogen keep (1 2 3)
merge 1:1 date hrginst using tmp/pressdistance, nogen keep (1 2 3)
merge 1:1 date hrginst using tmp/tempdistance, nogen keep (1 2 3)
merge 1:1 date hrginst using tmp/winddistance, nogen keep (1 2 3)
merge 1:1 date hrginst using tmp/prcpdistance, nogen keep (1 2 3)

save tmp/weatherpollutiondistance, replace

clear
cap frame create tempframe
tempfile tempfile
forvalues y=2016/2019 {
  frame tempframe: import excel "CA BPH/Hearings CY2016-CY2019.xlsx", sheet("CY `y'wCMR") firstrow cellrange(A2) clear
  frame tempframe: save `tempfile', replace
  append using `tempfile'
}
keep EventType LastName CDCnumber ScheduleDate Result Length GovernorReviewAuthority HearingInstitution Chair OtherPanelMember
ren (EventType LastName CDCnumber ScheduleDate Result Length GovernorReviewAuthority HearingInstitution Chair OtherPanelMember) ///
    (hearingtype inmatelastname cdcr date result length governorreview hrginst panelchair otherpanelmembers)
merge m:1 hrginst using "CA BPH/insgps", nogen
gen year = year(date)
gen month = month(date)
gen day = day(year)
save tmp/parole1619, replace


use tmp/weatherpollutiondistance, clear

merge 1:m date hrginst using tmp/parole1619, nogen keep(3)

drop if dtemp > 50 

g res=0 if result=="Deny"
replace res=1 if result=="Grant" 

g week=week(date)

g yearmonth=string(year) + string(month)
g instyear=hrginst + string(year)
g instweek=hrginst + string(week)
g instmonthyear=hrginst +string(year) + string(month) 
g instmonth=hrginst + string(month) 
g chairmonth=panelchair + string(month)

encode yearmonth, g(ym)
encode instyear, g(insy)
encode instweek, g(insw)
encode instmonthyear, g(inym)
encode instmonth, g(insm)
encode chairmonth, g(chm)
encode inmatelastname, g(name)

g dayofweek=dow(date)

encode hrginst, g(inst)
encode panelchair, g(chair)
encode otherpanelmembers, g(other)
encode hearingtype, g(type)


g tempmean10=tempmean/1000
replace ltemp=ltemp/1000
replace letemp=letemp/1000

drop if tempmean10==. | ltemp==. | letemp==.


// merge m:1 panelchair using gender, nogen keep(3)  // DR note: gender not used
//
// g female=0
// replace female=1 if gender=="female"

saveold replication/parole1619, replace ver(12)
